Creating Pandas DataFrames

Create a Pandas DataFrame From a CSV File


In [ ]:
# Import the Python libraries we need
import pandas as pd

In [ ]:
# Define a variable for the accidents data file
accidents_data_file = '/Users/robert.dempsey/Dropbox/Private/Art of Skill Hacking/' \
                      'Books/Python Business Intelligence Cookbook/Data/Stats19-Data1979-2004/Accidents7904.csv'

Import the entire accidents dataset


In [ ]:
accidents = pd.read_csv(accidents_data_file,
                        sep=',',
                        header=0,
                        index_col=False,
                        parse_dates=True,
                        tupleize_cols=False,
                        error_bad_lines=False,
                        warn_bad_lines=True,
                        skip_blank_lines=True,
                        low_memory=False
                        )
accidents.head()

Import the first 1000 rows of the accidents dataset


In [ ]:
# Use the first column of the CSV file as the index
accidents = pd.read_csv(accidents_data_file,
                        sep=',',
                        header=0,
                        index_col=0,
                        parse_dates=True,
                        tupleize_cols=False,
                        error_bad_lines=True,
                        warn_bad_lines=True,
                        skip_blank_lines=True,
                        nrows=1000
                        )
accidents.head()

Create a Pandas DataFrame From an Excel File


In [ ]:
# Import the Python libraries we need
import pandas as pd

In [ ]:
# Define a variable for the accidents data file
customer_data_file = 'data/customer_data.xlsx'

In [ ]:
# Create a dataframe from the Excel file
customers = pd.read_excel(customer_data_file,
                          sheetname=0,
                          header=0,
                          index_col=False,
                          keep_default_na=True
                         )
customers.head()

In [ ]:
# Create a JSON file from the DataFrame for the next recipe
customers.to_json('data/customer_data.json', orient='records')

Create a Pandas DataFrame From a JSON File


In [ ]:
# Import the Python libraries we need
import pandas as pd

In [ ]:
# Define a variable for our JSON file
customer_json_file = 'data/customer_data.json'

In [ ]:
# Create a dataframe from the JSON file
# Note: dates in our JSON file are stored in ISO format 
customers_json = pd.read_json(customer_json_file,
                             convert_dates=True)
customers_json.head()

In [ ]: